package org.wisdomdata.selenium.strategy;

import java.util.logging.Logger;

import org.wisdomdata.framework.Extractor;
import org.wisdomdata.selenium.SeleniumStringExtractor;

public class RecrawlStrategy {
	private final static Logger logger =
			Logger.getLogger(RecrawlStrategy.class.getName());
	private boolean lastPage;

	public boolean isLastPage() {
		return lastPage;
	}

	public void setLastPage(boolean lastPage) {
		this.lastPage = lastPage;
	}

	/**
	 * 这次抓取总记录数，可以指定也可以在页面中某个位置提取，这个值建议最好是在页面中指定，
	 * 那么此时必须指定一个提取总页数的抽取者
	 * */
	private Extractor currentTotalNumberExtractor;
	public Extractor getCurrentTotalNumberExtractor() {
		return currentTotalNumberExtractor;
	}

	public void setCurrentTotalNumberExtractor(Extractor currentTotalNumberExtractor) {
		this.currentTotalNumberExtractor = currentTotalNumberExtractor;
	}

	private int totalNumber = 0;
	public int getTotalNumber() {
		return totalNumber;
	}

	public void setTotalNumber(int totalNumber) {
		this.totalNumber = totalNumber;
	}

	/**
	 * 上次抓取的总记录数
	 * */
	private int lastTimeTotalNumber = 0;
	public int getLastTimeTotalNumber() {
		return this.lastTimeTotalNumber;
	}

	public void setLastTimeTotalNumber(int lastTimeTotalNumber) {
		this.lastTimeTotalNumber = lastTimeTotalNumber;
	}

	/**
	 * 获取每页记录数,可以指定也可以在页面中某个位置提取
	 * */
	private Extractor eachPageNumberExtractor;
	public Extractor getEachPageNumberExtractor() {
		return eachPageNumberExtractor;
	}

	public void setEachPageNumberExtractor(Extractor eachPageNumberExtractor) {
		this.eachPageNumberExtractor = eachPageNumberExtractor;
	}

	private int eachPageNumber = 0;
	public int getEachPageNumber() {
		if (this.eachPageNumber <= 0) {
			int number = 0;
			if (this.getEachPageNumberExtractor() != null) {
				number = getNumberAction(this.getEachPageNumberExtractor());
				if (number <= 0) {
					logger.info("can not auto locate each page!");
				}
			}
			return number;
		} else {
			return eachPageNumber;
		}
	}
	
	public void setEachPageNumber(int eachPageNumber) {
		this.eachPageNumber = eachPageNumber;
	}
	
	private int getNumberAction(Extractor extractor) {
		extractor.extract();
		String result = ((SeleniumStringExtractor) extractor).getExtractResult();
		int r = 0;
		if (result != null) {
			try {
				r = Integer.parseInt(result);
			} catch (NumberFormatException e) {
				logger.info("RecrawlStrategy: can not parse the " + result + " to number!");
			}
		} 
		return r;
		
		
	}
	
	public int getNeedCrawlPageNumber() {
		if (this.getTotalNumber() <= this.getLastTimeTotalNumber()) {
			logger.warning("SeleniumClickLoopProcessor: last time total page number = this time, means do nothing!");
			return 0;
		}
		int number = (this.getTotalNumber() - this.getLastTimeTotalNumber())/this.getEachPageNumber();
		int lastNumber = (this.getTotalNumber() - this.getLastTimeTotalNumber())%this.getEachPageNumber();
		
		if (lastNumber == 0)
			return number;
		else 
			return number + 1;
	}
	
	public int getLastPageNeedCrawlItemsNumber() {
		return (this.getTotalNumber() - this.getLastTimeTotalNumber())%this.getEachPageNumber();
	}
	
	
	public int getCurrentTotalNumber() {
		//如果用户没有设置当前的总页数，那么去找用户是否设置了找到总页数的action
		int number = 0;
		if (this.getCurrentTotalNumberExtractor() != null) {
			number = getNumberAction(this.getCurrentTotalNumberExtractor());
			//如果找不到总页数，那么总页数就是0
			if (number <= 0) {
				logger.warning("can not auto locate each page!");
			}
			return number;
		}
		return 0;
	}

}
